## Modeling for the Elections.  Three sets (66, 68, or 5-years)
## All States, front page, 6/1 - 12/31 of given year


rm(list=ls(all=TRUE)) #Clear out our workspace
dirname <- "FiveElections" #fill in where you want this written to.

##Import and Inspect the Data
setwd(YOURLOCAL) #fill in wiht your local directory.
#Load the Files
library(lda)
#data <- read.csv("66election_unigrams.csv") #preprocessed with 1%, 99% thresholds in theory
#data <- read.csv("68election_unigrams.csv")

#meta <- read.csv("66meta.csv")
#meta <- read.csv("68meta.csv")
fiveflag <- T #set this flag when working with the 5 elections data.
if (fiveflag==T) {
  dir <- getwd()
  setwd(DIRECTORYWHEREDOCUMENTSARE)
  data <- read.csv(file="ElectionTDM.csv") 
  meta66 <- read.csv("66meta.csv")
  meta68 <- read.csv("68meta.csv")
  meta70 <- read.csv("70meta.csv")
  meta72 <- read.csv("72meta.csv")
  meta84 <- read.csv("84meta.csv")
  #Loop over File sub-variable and paste in the directory extension. This allows merge
  for(i in 1:nrow(meta66)) {
    meta66$filename[i] <- paste(c("Election1866/",as.character(meta66$File[i])),sep="",collapse="")
  }
  for(i in 1:nrow(meta68)) {
    meta68$filename[i] <- paste(c("Election1868v2/",as.character(meta68$File[i])),sep="",collapse="")
  }
  for(i in 1:nrow(meta70)) {
    meta70$filename[i] <- paste(c("70election/",as.character(meta70$File[i])),sep="",collapse="")
  }
  for(i in 1:nrow(meta72)) {
    meta72$filename[i] <- paste(c("72election/",as.character(meta72$File[i])),sep="",collapse="")
  }
  for(i in 1:nrow(meta84)) {
    meta84$filename[i] <- paste(c("84election/",as.character(meta84$File[i])),sep="",collapse="")
  }
  meta <- rbind(meta66, meta68, meta70, meta72,meta84)
  setwd(dir)
}

coha <- read.table("CohaUnigrams.txt") #see readme.
coha <- coha[coha$V3<11,] #subsetting to the 19th century, we have 966,587 entries
out <- aggregate(coha$V1, list(word=coha$V2), sum,na.rm=T) #we have 250,453 words now

#dir.create(dirname)
setwd(dirname)

###########
# Cut words based on COHA data
###########
##  Check all words against COHA data which have at least 500 appearances
mincount <- 500
minchar <- 20 #parameter value set arbitrarily high but left in script for experimentation

cut <- out$word[out$x<=mincount] # create a list to cut where the words occur less than X times
write.csv(cut, "cutwordsCOHA.csv",row.names=F,quote=F)
   
coha <- out$word[out$x>mincount] # write out our word list for keeping
coha <- as.character(coha)

rm(out,cut) #remove unneeded pieces

dim(data)
docnames <- data[,1]
data <- data[,-1]

#We start by removing all words with less than 5 characters
write.csv(names(data)[which(nchar(names(data))<5)], "cutbyLength.csv",row.names=F,quote=F)
data <- data[,-which(nchar(names(data))<5)]

#now we want to remove any word less than or equal mincharacters which isn't in the coha list
if (fiveflag==F) { #Condition on it not being the five elections data due to stemming.
  obs.words <- names(data)
  strlengths <- nchar(obs.words)
  words.check <- obs.words[strlengths<=minchar]
  
  legit <- words.check[which(words.check %in% coha)]
  cut <- words.check[-which(words.check %in% coha)]
  write.csv(cut, "cutbyCOHA.csv" ,row.names=F,quote=F)
  data <- data[,-which(names(data)%in%cut)]
}

#Generate Frequency Data for Remaining Words
freq <- apply(data, 2, function(x) sum(x>0))
write.csv(freq[order(freq, decreasing=T)], "wordsWfreq.csv",row.names=T,quote=F)



#############
# Process Docs for Modeling
#############
documents <- apply(data, 1, function(y) {
      rbind(which(y > 0) - 1L,
         y[y > 0])
      })

vocab <- colnames(data)

#####
# Meta-Data Parse
#####
titles <- meta$title

titles <- as.character(titles)
test <- strsplit(titles, split="-") #splits range from 5 to 7, approach from back

#Last slot should be nothing useful, 
#  second to last is year, before that date, before that components of title

#Have to check for EXTRA and EDITION
titleonly <- c(); year <- c(); day <- c()
for (i in 1:nrow(meta))  {
  l <- length(test[[i]])
  if(length(grep("EDITION",test[[i]]))!=0) l <- l-1 #these conditions check if these extra pieces appear
  if(length(grep("EXTRA",test[[i]]))!=0) l <- l-1
  if(length(grep("SUPPLEMENT",test[[i]]))!=0) l <- l-1
  if(length(grep("Supplement",test[[i]]))!=0) l <- l-1
  if(length(grep("Morning",test[[i]]))!=0) l <- l-1
  if(length(test[[i]])==1) {
    year[i] <- NA
    titleonly[i] <- "Unknown"
    day[i] <- NA
  } else {
    year[i] <- test[[i]][l-1]
    titleonly[i] <- paste(test[[i]][1:(l-3)], collapse="")
    day[i] <- test[[i]][l-2]
  }
}
meta$year <- year
meta$titleonly <- titleonly
meta$day <- day


########
#Merging Parsed Meta Data with Original Info
#######
docnames <- as.character(docnames)
out <- cbind(docnames, 1:length(docnames))

if(fiveflag==T) {
  fullset <- merge(out, meta, by.x="docnames", by.y="filename",sort=F)
} else{
  fullset <- merge(out, meta, by.x="docnames", by.y="File",sort=F) #turning sort off here keeps it from messing with the ordering
}
rm(meta) # remove the meta data matrix to avoid confusion.


#########
#LDA
#########
set.seed(12345)

#originally: alpha 1, eta 1.2, k=20
alpha <- 1
eta <- 1.2
if(fiveflag==T) {
  alpha <- .01
  eta <- .01
}

K <- 20
result <- lda.collapsed.gibbs.sampler(
  documents,  #This is the set of documents
  K,                 #This is the number of clusters
  vocab,      #This is the vocab set
  25,                 #These are additional model parameters
  alpha,
  eta)


##A built in labeling function for this model
top.words <- top.topic.words(result$topics, 100, by.score=TRUE) 
top.words 

require(wordcloud)

#Put new fonts in the system
library(extrafont)
#font_import()  #import one time, load each time
loadfonts()

#Note: a lot of what makes this complicated was trying to produce the wordclouds in YLJ's font.  You can ignore most of this if just using the code.

# For Windows - in each session, adjust the path to match your installation of Ghostscript
Sys.setenv(R_GSCMD = "C:/Program Files/gs/gs9.06/bin/gswin64c.exe")

for (i in 1:K) {
  temp <- result$topics[i,]
  
  pdf(file = paste("WordCloud.Topic",i,".pdf",collapse="",sep=""), width = 4.5, height = 4.5, family = "YaleDesign") 
  wordcloud(names(temp),temp, scale=c(3,.375), max.words=250)
  dev.off()
  #  If you don't specify 'outfile', it will overwrite the original file
  embed_fonts(paste("WordCloud.Topic",i,".pdf",collapse="",sep=""))  
}  


tab <- top.words

topicdocs <- top.topic.documents(result$document_sums, num.documents=10)

norm <- colSums(result$document_sums)
docprops <- matrix(0, nrow=K, ncol=nrow(data))
for (i in 1:K) {
  docprops[i,] <- result$document_sums[i,]/norm
}
docprops <- t(docprops)
docprops <- cbind(as.character(fullset$link), docprops)
docprops[,1] <- gsub("seq-1/", "seq-1.pdf", docprops[,1])
write.table(docprops, "DocumentLoadings.csv", sep=",", row.names=F)

#########
#Writing Out Results
## note this generate lots of useful results including top words etc.
#########
append <- matrix(NA, nrow=10, ncol=K)
for (i in 1:K) {
  append[1:10,i] <- as.character(fullset$link[topicdocs[,i]])
}
append <- gsub("seq-1/", "seq-1.pdf", append)
tab <- rbind(tab, append)
props <- result$topic_sums/sum(result$topic_sums)
tab <- rbind(tab, t(props))

write.table(tab, "Topics20.csv", sep=",", row.names=F)

fullset$date <- paste(fullset$day, fullset$year)
fullset$date <- as.Date(fullset$date, format=" %B %d %Y")

theta <- matrix(NA, nrow=length(documents),ncol=K)
for (i in 1:K) {
  theta[,i] <- result$document_sums[i,]/document.lengths(documents)
}

library(gam)
for (k in 1:K) {
  aggdata <-aggregate(theta[,k], by=list(fullset$date), 
                    FUN=mean, na.rm=TRUE)

  pdf(file = paste(c("Topic",k,".pdf"),collapse="",sep=""), width = 4.5, height = 4.5, family = "YaleDesign") 
  par(mar=c(5, 4, 2.5, 2))
  plot(aggdata[,1], aggdata$x, main=paste("Topic", k, "Over Time", collapse=""), 
     ylab="Topic Proportion", xlab="Date") 
  lines(loess.smooth(aggdata[,1], aggdata$x), lwd=4, col="black")
  dev.off()
  embed_fonts(paste(c("Topic",k,".pdf"),collapse="",sep=""))  
  write.table(round(summary(gam(aggdata$x ~ s(aggdata$Group.1)))$anova[4][2,1],3), #hypothesis tests
              file="HypothesisTests.csv",append=T,col.names=F, row.names=F)
}

write.csv(file="Papers.csv",table(fullset$titleonly))



###########
# Word Clouds
###########

wordfreqs <- apply(data, 2, sum)
pdf(file = "CorpusCloud.pdf", width = 4.5, height = 4.5, family = "YaleDesign") 
wordcloud(names(wordfreqs),wordfreqs,  scale=c(3,.375), max.words=250)
dev.off()
embed_fonts("CorpusCloud.pdf")


setwd("..")


###################
# Keywords
####################

if(fiveflag==T) {
  dir.create("Keywords")
  setwd("Keywords")
  
  fullset$week <- as.factor(format(fullset$date,'%Y%W'))
  fullset$pretty <- format(fullset$date, 'Week %W, %Y')
  
  
  aggdata <-aggregate(data[,c("constitut","reconstruct","unconstitut","amend","convent")], 
                      by=list(fullset$week), 
                      FUN=mean, na.rm=TRUE)
  
  aggdata$year <- as.numeric(substr(aggdata$Group.1,1,4))
  
  aggdata$week <- as.numeric(substr(aggdata$Group.1,5,6))
  aggdata$weeknumber <- 1:nrow(aggdata)
  
  #Constitution
  pdf(file = "Constitution.pdf", width = 8, height = 12, family = "YaleDesign") 
  par(mfrow=c(3,2))
  years <- c(1866,1868,1870,1872,1884)
  for(i in 1:5) {
    plot(aggdata[aggdata$year==years[i],"week"], aggdata[aggdata$year==years[i],2], main=years[i], 
         xlab="Week Number", ylab="Mean Count by Week", ylim=c(0,4.5))
    lines(loess.smooth(aggdata[aggdata$year==years[i],"week"], aggdata[aggdata$year==years[i],2]), lwd=2, col="blue")
  }
  plot(aggdata[,1], aggdata[,2],main="Use of 'Constitution'", xlab="Concatenated Weeks", ylab="Mean Count by Week")
  lines(loess.smooth(as.numeric(aggdata[,1]), aggdata[,2],span=.25), lwd=2, col="blue")
  abline(v=c())
  dev.off()
  embed_fonts("Constitution.pdf")
  
  pdf(file = "Constitution2.pdf", width = 9.5, height = 5, family = "YaleDesign") 
  par(mfrow=c(1,2))
  y <- 1866
  plot(aggdata$week, aggdata[,2],main="Local Trends", xlab="Week Number", 
       ylab="Mean Count by Week", type="n", xlim=c(19,52), cex=.5)
  lines(aggdata$week[aggdata$year==1866], aggdata[aggdata$year==1866,2],lty=1)
  lines(aggdata$week[aggdata$year==1868], aggdata[aggdata$year==1868,2],lty=2)
  lines(aggdata$week[aggdata$year==1870], aggdata[aggdata$year==1870,2],lty=3)
  lines(aggdata$week[aggdata$year==1872], aggdata[aggdata$year==1872,2],lty=4)
  lines(aggdata$week[aggdata$year==1884], aggdata[aggdata$year==1884,2],lty=5)
  text(22,aggdata[aggdata$year==1866,2][1], label="1866", pos=2)
  text(22,aggdata[aggdata$year==1868,2][1], label="1868", pos=2)
  text(22,aggdata[aggdata$year==1870,2][1], label="1870", pos=2)
  text(22,aggdata[aggdata$year==1872,2][1], label="1872", pos=2)
  text(22,aggdata[aggdata$year==1884,2][1], label="1884", pos=2)
    
  plot(aggdata$weeknumber, aggdata[,2],main="Broad Trend", xlab="Individual Weeks", 
       ylab="Mean Count by Week", type="n", xaxt="n")
  axis(1, at=c(1,33,64,95,127), labels=c("1866","1868","1870","1872","1884"))
  lines(loess.smooth(aggdata$weeknumber, aggdata[,2],span=.25), lwd=3, col="black")
  points(aggdata$weeknumber, aggdata[,2])
  dev.off()
  embed_fonts("Constitution2.pdf") 
  
  pdf(file = "Constitution3.pdf", width = 4.8, height = 4.8, family = "YaleDesign") 
  par(mfrow=c(1,1))
  plot(aggdata$weeknumber, aggdata[,2],main="Broad Trend", xlab="Individual Weeks", 
       ylab="Mean Count by Week", type="n", xaxt="n")
  axis(1, at=c(1,33,64,95,127), labels=c("1866","1868","1870","1872","1884"))
  lines(loess.smooth(aggdata$weeknumber, aggdata[,2],span=.25), lwd=3, col="black")
  points(aggdata$weeknumber, aggdata[,2])
  dev.off()
  
  #Reconstruction
  pdf(file = "Reconstruction.pdf", width = 8, height = 12, family = "YaleDesign") 
  par(mfrow=c(3,2))
  years <- c(1866,1868,1870,1872,1884)
  for(i in 1:5) {
    plot(aggdata[aggdata$year==years[i],"week"], aggdata[aggdata$year==years[i],3], main=years[i], 
         xlab="Week Number", ylab="Mean Count by Week", ylim=c(0,1))
    lines(loess.smooth(aggdata[aggdata$year==years[i],"week"], aggdata[aggdata$year==years[i],3]), lwd=2, col="blue")
  }
  
  
  plot(aggdata[,1], aggdata[,3], main="Use of 'Reconstruction'", xlab="Concatenated Weeks", ylab="Mean Count by Week")
  lines(loess.smooth(as.numeric(aggdata[,1]), aggdata[,3],span=.25), lwd=2, col="blue")
  dev.off()
  embed_fonts("Reconstruction.pdf")
  
  wordnum <- 3
  pdf(file = "Reconstruction2.pdf", width = 9.5, height = 5, family = "YaleDesign") 
  par(mfrow=c(1,2))

  plot(aggdata$week, aggdata[,wordnum],main="Local Trends", xlab="Week Number", 
       ylab="Mean Count by Week", type="n", xlim=c(19,52), cex=.5)
  lines(aggdata$week[aggdata$year==1866], aggdata[aggdata$year==1866,wordnum],lty=1)
  lines(aggdata$week[aggdata$year==1868], aggdata[aggdata$year==1868,wordnum],lty=2)
  lines(aggdata$week[aggdata$year==1870], aggdata[aggdata$year==1870,wordnum],lty=3)
  lines(aggdata$week[aggdata$year==1872], aggdata[aggdata$year==1872,wordnum],lty=4)
  lines(aggdata$week[aggdata$year==1884], aggdata[aggdata$year==1884,wordnum],lty=5)
  text(22,aggdata[aggdata$year==1866,wordnum][1], label="1866", pos=2)
  text(22,aggdata[aggdata$year==1868,wordnum][1], label="1868", pos=2)
  text(22,aggdata[aggdata$year==1870,wordnum][1], label="1870", pos=2)
  text(22,aggdata[aggdata$year==1872,wordnum][1], label="1872", pos=2)
  text(22,aggdata[aggdata$year==1884,wordnum][1], label="1884", pos=2)
  
  plot(aggdata$weeknumber, aggdata[,wordnum],main="Broad Trend", xlab="Individual Weeks", 
       ylab="Mean Count by Week", type="n", xaxt="n")
  axis(1, at=c(1,33,64,95,127), labels=c("1866","1868","1870","1872","1884"))
  lines(loess.smooth(aggdata$weeknumber, aggdata[,wordnum],span=.25), lwd=3, col="black")
  points(aggdata$weeknumber, aggdata[,wordnum])
  dev.off()
  embed_fonts("Reconstruction2.pdf") 
  
  
  #Unconstitutional
  pdf(file = "Unconstitutional.pdf", width = 8, height = 12, family = "YaleDesign") 
  par(mfrow=c(3,2))
  years <- c(1866,1868,1870,1872,1884)
  for(i in 1:5) {
    plot(aggdata[aggdata$year==years[i],"week"], aggdata[aggdata$year==years[i],4], main=years[i], 
         xlab="Week Number", ylab="Mean Count by Week", ylim=c(0,.3))
    lines(loess.smooth(aggdata[aggdata$year==years[i],"week"], aggdata[aggdata$year==years[i],4]), lwd=2, col="blue")
  }
  
  
  plot(aggdata[,1], aggdata[,4], main="Use of 'Unconstitutional'", xlab="Concatenated Weeks", ylab="Mean Count by Week")
  lines(loess.smooth(as.numeric(aggdata[,1]), aggdata[,4],span=.25), lwd=2, col="blue")
  dev.off()
  embed_fonts("Unconstitutional.pdf")

  wordnum <- 4
  pdf(file = "Unconstitutional2.pdf", width = 9.5, height = 5, family = "YaleDesign") 
  par(mfrow=c(1,2))
  
  plot(aggdata$week, aggdata[,wordnum],main="Local Trends", xlab="Week Number", 
       ylab="Mean Count by Week", type="n", xlim=c(19,52), cex=.5)
  lines(aggdata$week[aggdata$year==1866], aggdata[aggdata$year==1866,wordnum],lty=1)
  lines(aggdata$week[aggdata$year==1868], aggdata[aggdata$year==1868,wordnum],lty=2)
  lines(aggdata$week[aggdata$year==1870], aggdata[aggdata$year==1870,wordnum],lty=3)
  lines(aggdata$week[aggdata$year==1872], aggdata[aggdata$year==1872,wordnum],lty=4)
  lines(aggdata$week[aggdata$year==1884], aggdata[aggdata$year==1884,wordnum],lty=5)
  text(22,aggdata[aggdata$year==1866,wordnum][1], label="1866", pos=2)
  text(22,aggdata[aggdata$year==1868,wordnum][1], label="1868", pos=2)
  text(22,aggdata[aggdata$year==1870,wordnum][1], label="1870", pos=2)
  text(22,aggdata[aggdata$year==1872,wordnum][1], label="1872", pos=2)
  text(22,aggdata[aggdata$year==1884,wordnum][1], label="1884", pos=2)
  
  plot(aggdata$weeknumber, aggdata[,wordnum],main="Broad Trend", xlab="Individual Weeks", 
       ylab="Mean Count by Week", type="n", xaxt="n")
  axis(1, at=c(1,33,64,95,127), labels=c("1866","1868","1870","1872","1884"))
  lines(loess.smooth(aggdata$weeknumber, aggdata[,wordnum],span=.25), lwd=3, col="black")
  points(aggdata$weeknumber, aggdata[,wordnum])
  dev.off()
  embed_fonts("Unconstitutional2.pdf") 
  
  
  #Amendment
  pdf(file = "Amend.pdf", width = 8, height = 12,  family = "YaleDesign") 
  par(mfrow=c(3,2))
  years <- c(1866,1868,1870,1872,1884)
  for(i in 1:5) {
    plot(aggdata[aggdata$year==years[i],"week"], aggdata[aggdata$year==years[i],5], main=years[i], 
         xlab="Week Number", ylab="Mean Count by Week", ylim=c(0,4.2))
    lines(loess.smooth(aggdata[aggdata$year==years[i],"week"], aggdata[aggdata$year==years[i],5]), lwd=2, col="blue")
  }
  
  
  plot(aggdata[,1], aggdata[,5], main="Use of 'Amend'", xlab="Concatenated Weeks", ylab="Mean Count by Week")
  lines(loess.smooth(as.numeric(aggdata[,1]), aggdata[,5],span=.25), lwd=2, col="blue")
  dev.off()
  embed_fonts("Amend.pdf")
  
  wordnum <- 5
  pdf(file = "Amend2.pdf", width = 9.5, height = 5, family = "YaleDesign") 
  par(mfrow=c(1,2))
  
  plot(aggdata$week, aggdata[,wordnum],main="Local Trends", xlab="Week Number", 
       ylab="Mean Count by Week", type="n", xlim=c(19,52), cex=.5)
  lines(aggdata$week[aggdata$year==1866], aggdata[aggdata$year==1866,wordnum],lty=1)
  lines(aggdata$week[aggdata$year==1868], aggdata[aggdata$year==1868,wordnum],lty=2)
  lines(aggdata$week[aggdata$year==1870], aggdata[aggdata$year==1870,wordnum],lty=3)
  lines(aggdata$week[aggdata$year==1872], aggdata[aggdata$year==1872,wordnum],lty=4)
  lines(aggdata$week[aggdata$year==1884], aggdata[aggdata$year==1884,wordnum],lty=5)
  text(22,aggdata[aggdata$year==1866,wordnum][1], label="1866", pos=2)
  text(22,aggdata[aggdata$year==1868,wordnum][1], label="1868", pos=2)
  text(22,aggdata[aggdata$year==1870,wordnum][1], label="1870", pos=2)
  text(22,aggdata[aggdata$year==1872,wordnum][1], label="1872", pos=2)
  text(22,aggdata[aggdata$year==1884,wordnum][1], label="1884", pos=2)
  
  plot(aggdata$weeknumber, aggdata[,wordnum],main="Broad Trend", xlab="Individual Weeks", 
       ylab="Mean Count by Week", type="n", xaxt="n")
  axis(1, at=c(1,33,64,95,127), labels=c("1866","1868","1870","1872","1884"))
  lines(loess.smooth(aggdata$weeknumber, aggdata[,wordnum],span=.25), lwd=3, col="black")
  points(aggdata$weeknumber, aggdata[,wordnum])
  dev.off()
  embed_fonts("Amend2.pdf") 
  
  #Convention
  pdf(file = "Convention.pdf", width = 8, height = 12, family = "YaleDesign") 
  par(mfrow=c(3,2))
  years <- c(1866,1868,1870,1872,1884)
  for(i in 1:5) {
    plot(aggdata[aggdata$year==years[i],"week"], aggdata[aggdata$year==years[i],6], main=years[i], 
         xlab="Week Number", ylab="Mean Count by Week", ylim=c(0,10))
    lines(loess.smooth(aggdata[aggdata$year==years[i],"week"], aggdata[aggdata$year==years[i],6]), lwd=2, col="blue")
  }
  
  
  plot(aggdata[,1], aggdata[,6], main="Use of 'Convent-'", xlab="Concatenated Weeks", ylab="Mean Count by Week")
  lines(loess.smooth(as.numeric(aggdata[,1]), aggdata[,6],span=.25), lwd=2, col="blue")
  dev.off()
  embed_fonts("Convention.pdf")
  
  
  wordnum <- 6
  pdf(file = "Convention2.pdf", width = 9.5, height = 5, family = "YaleDesign") 
  par(mfrow=c(1,2))
  
  plot(aggdata$week, aggdata[,wordnum],main="Local Trends", xlab="Week Number", 
       ylab="Mean Count by Week", type="n", xlim=c(19,52), cex=.5)
  lines(aggdata$week[aggdata$year==1866], aggdata[aggdata$year==1866,wordnum],lty=1)
  lines(aggdata$week[aggdata$year==1868], aggdata[aggdata$year==1868,wordnum],lty=2)
  lines(aggdata$week[aggdata$year==1870], aggdata[aggdata$year==1870,wordnum],lty=3)
  lines(aggdata$week[aggdata$year==1872], aggdata[aggdata$year==1872,wordnum],lty=4)
  lines(aggdata$week[aggdata$year==1884], aggdata[aggdata$year==1884,wordnum],lty=5)
  text(22,aggdata[aggdata$year==1866,wordnum][1]-.5, label="1866", pos=2)
  text(22,aggdata[aggdata$year==1868,wordnum][1], label="1868", pos=2)
  text(22,aggdata[aggdata$year==1870,wordnum][1], label="1870", pos=2)
  text(22,aggdata[aggdata$year==1872,wordnum][1], label="1872", pos=2)
  text(22,aggdata[aggdata$year==1884,wordnum][1], label="1884", pos=2)
  
  plot(aggdata$weeknumber, aggdata[,wordnum],main="Broad Trend", xlab="Individual Weeks", 
       ylab="Mean Count by Week", type="n", xaxt="n")
  axis(1, at=c(1,33,64,95,127), labels=c("1866","1868","1870","1872","1884"))
  lines(loess.smooth(aggdata$weeknumber, aggdata[,wordnum],span=.25), lwd=3, col="black")
  points(aggdata$weeknumber, aggdata[,wordnum])
  dev.off()
  embed_fonts("Convention2.pdf") 
  
  
  
  write.csv(aggdata, file="WeeklyWordUseData.csv")
  
  
  
}









